package cn.itcast.hello;

import org.apache.spark.sql.Dataset;
import org.apache.spark.sql.Encoders;
import org.apache.spark.sql.SparkSession;

import java.util.Arrays;
import static org.apache.spark.sql.functions.col;

/**
 * Author itcast
 * Desc 演示使用Java语言开发SparkSQL完成WordCount
 */
public class JavaSparkDemo03 {
    public static void main(String[] args) {
        //0.TODO 准备环境
        SparkSession spark = SparkSession.builder().appName("JavaSparkDemo").master("local[*]").getOrCreate();
        spark.sparkContext().setLogLevel("WARN");


        //1.TODO 加载数据
        Dataset<String> ds = spark.read().textFile("data/input/words.txt");

        //2.TODO 处理数据-WordCount
        Dataset<String> wordsDS = ds.flatMap((String line) -> Arrays.asList(line.split(" ")).iterator(), Encoders.STRING());

        //TODO ====SQL
        wordsDS.createOrReplaceTempView("t_word");
        String sql = "select value, count(*) as counts " +
                "from t_word " +
                "group by value " +
                "order by counts desc";
        spark.sql(sql).show();

        //TODO ====DSL
        /*Dataset<Row> temp = wordsDS.groupBy("value")
                .count();
        temp.orderBy(temp.col("count").desc()).show();*/
        wordsDS.groupBy("value")
                .count()
                //.orderBy($"count".desc()).show();
                .orderBy(col("count").desc()).show();

        //3.TODO 输出结果


        //4.TODO 关闭资源
        spark.stop();

    }
}
